rm(list=ls())

library(tidyverse); library(gridExtra); library(ggprism); library(stargazer); library(psych); library(xtable); library(marginaleffects); library(ggplot2)

# load and inspect data
speeches <- read.csv("speeches.csv")
head(speeches)


###
### Figure 3.1: Keyword Ratio in Speeches and Manifestos
### (a) Parliamentary speeches (1949-2020)
###
speech_crime_plot <- speeches %>% 
  pivot_longer( cols=c(log_ratio, bias))
head(speech_crime_plot)
speech_crime_plot <- subset(speech_crime_plot, party!="AfD")
speech_crime_plot$party <- factor(speech_crime_plot$party, levels = c("AfD", "CDU", "SPD", "FDP", "Greens", "Left"))
levels(speech_crime_plot$party)
position <- subset(speech_crime_plot , name=="log_ratio")

a.plot <- 
  ggplot(position, aes(year, value)) + theme_bw() + 
  facet_wrap(~party, ncol=5) +  geom_line() +
  ylab("Position") + geom_hline(yintercept=0, linetype="dashed") +
  scale_x_continuous(guide = "prism_minor", 
                     #limits = c(2008, 2018),
                     #breaks=c(2010,2015),
                     expand = c(0, 0)#,
                     #minor_breaks = seq(2008,2018, 1)
  ) +
  theme(axis.text.x=element_blank(), axis.title.x=element_blank(), 
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=16),
        prism.ticks.length.x = unit(5, "pt"),#adjust length of minor tick marks
        legend.position="none",#suppress legend,
        strip.text.x = element_text(size=14), #change color and size of facet strip text
        panel.spacing = unit(1, "lines"), #add more space between panels to better fit axis tick labels
        plot.margin = unit(c(.5, .5, .1, .1), "cm"),#adjust spacing between panels
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
        #panel.grid.minor.y = element_blank()  #suppress horizontal grid lines)
  )

bias <- subset(speech_crime_plot, name=="bias")

b.plot <- 
  
  ggplot(bias, aes(year, value)) + theme_bw() + 
  facet_wrap(~party, ncol=5) +  geom_line() +
  ylab("Bias") + geom_hline(yintercept=0, linetype="dashed") +
  scale_x_continuous(guide = "prism_minor", 
                     #limits = c(2008, 2018),
                     #breaks=c(2010,2015),
                     expand = c(0, 0)#,
                     #minor_breaks = seq(2008,2018, 1)
  ) +
  theme(axis.text.x = element_text(size=12), axis.title.x=element_blank(), 
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=16),
        prism.ticks.length.x = unit(5, "pt"),#adjust length of minor tick marks
        legend.position="none",#suppress legend,
        strip.text.x = element_text(size=14), #change color and size of facet strip text
        panel.spacing = unit(1, "lines"), #add more space between panels to better fit axis tick labels
        plot.margin = unit(c(.5, .5, .1, .1), "cm"),#adjust spacing between panels
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
        #panel.grid.minor.y = element_blank()  #suppress horizontal grid lines)
  )

# combined into single PDF
pdf("Fig3_1a.pdf", height=7, width=12)
grid.arrange(a.plot , b.plot, nrow=2)
dev.off()


###
### Table B.3: Regression of Keywords in Speeches on Parties
###
speeches$party <- factor(speeches$party, levels = c("AfD", "CDU", "FDP", "Greens", "Left", "SPD"))
speeches$party <- relevel(speeches$party, ref = "CDU") # make CDU/CSU reference party
speeches$decade <- (speeches$year %/% 10)*10 # create decade variable
mod1 <- lm(log_ratio ~ as.factor(party), speeches)
mod2 <- lm(log_ratio ~ as.factor(party) + as.factor(decade), speeches)
mod3 <- lm(log_ratio ~ as.factor(party) + as.factor(year), speeches)
mod4 <- lm(bias ~ as.factor(party), speeches)
mod5 <- lm(bias ~ as.factor(party) + as.factor(decade), speeches)
mod6 <- lm(bias ~ as.factor(party) + as.factor(year), speeches)
stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: logged ratio",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "", "", "checkmark", "")))


###
### Figure 3.2: Regression of Keywords in Speeches on Parties
###
ests_poll = rbind.data.frame(summary(mod1)$coefficients[2:6,1:2], summary(mod3)$coefficients[2:6,1:2],
                             summary(mod4)$coefficients[2:6,1:2], summary(mod6)$coefficients[2:6,1:2])
names(ests_poll) = c("coef", "se")
ests_poll$party = rep(c("AfD", "FDP", "Greens", "Left", "SPD"), 4)
ests_poll$model = rep(c("w/o Fixed Effects", "w/ Year Fixed Effects"), each=5)
ests_poll$model <- factor(ests_poll$model, levels=c("w/o Fixed Effects", "w/ Year Fixed Effects"))
ests_poll$outcome <- rep(c("Position", "Bias"), each=10)
ests_poll$outcome <- factor(ests_poll$outcome, levels=c("Position", "Bias"))

pdf("Fig3_2.pdf", width = 9, height=5)
ggplot(data=ests_poll, aes(x=party, y=coef, group=factor(party), shape=factor(model))) + 
  geom_point(position=position_dodge2(0.5), size=2.5) + 
  geom_errorbar(aes(ymax=coef+1.96*se, ymin=coef-1.96*se), width=0, position=position_dodge(0.5), group=factor("party")) +
  theme_bw() + facet_wrap(~outcome) +
  geom_hline(yintercept = 0, linetype="dashed") +
  theme(legend.position = "bottom", text = element_text(size=18), panel.grid = element_blank()) +
  xlab("") + 
  ylab("Difference to CDU/CSU") +
  scale_shape_manual(values=c(16, 17), name="")
dev.off()


###
### Figure B.1: Keyword Ratio in Speeches and Manifestos
### (a) Parliamentary speeches (1949-2020)
###
twolines <- subset(speeches, party!="AfD")
twolines$centerleft <- ifelse((twolines$party=="CDU" | twolines$party=="FDP"), 0, 1)
table(twolines$centerleft, twolines$party, useNA="ifany")
twolines <- aggregate(log_ratio ~ year + centerleft, data=twolines, FUN=mean)
pdf("FigB1a.pdf", height=8,width=14)
ggplot(data=twolines, aes(x=year, y=log_ratio, color=as.factor(centerleft))) +
  geom_point(aes(shape = as.factor(centerleft))) + geom_line() +
  scale_color_manual(values=c("dodgerblue", "firebrick"), 
                     labels=c("Center-Right", "Center-Left"), name="") +
  scale_shape_manual(values=c(17, 16), 
                     labels=c("Center-Right", "Center-Left"), name="") +
  ylab("Logged ratio") + xlab("Year") +
  theme_classic(base_size = 22) +
  geom_hline(yintercept = 0, linetype="dashed", color="grey") +
  theme(panel.grid.minor = element_blank(), legend.position = "bottom") +
  guides(linetype=F)
dev.off()


###
### Table B.1: Speeches: Descriptive Statistics
###
xtable(describe(speeches[,c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="CDU",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="SPD",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="FDP",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="Greens",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="Left",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])
xtable(describe(speeches[speeches$party=="AfD",c("count_right", "count_left", "log_ratio", "crime_log_ratio", "bias")])[,c(2,3,4,8,9)])


###
### Table B.5: Regression of Keywords in Speeches on Parties, No Logged Ratio
###
mod1 <- lm(diff ~ as.factor(party), speeches)
mod2 <- lm(diff ~ as.factor(party) + as.factor(decade), speeches)
mod3 <- lm(diff ~ as.factor(party) + as.factor(year), speeches)
mod4 <- lm(bias_diff ~ as.factor(party), speeches)
mod5 <- lm(bias_diff ~ as.factor(party) + as.factor(decade), speeches)
mod6 <- lm(bias_diff ~ as.factor(party) + as.factor(year), speeches)
stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: difference",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "", "", "checkmark", "")))


###
### Table B.7: Regression of Keywords in Speeches on Parties and Far-Right Polling
###
mod1 <- lm(log_ratio ~ as.factor(party)*polbar_fr_vote_pct, speeches)
mod2 <- lm(log_ratio ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade), speeches)
mod3 <- lm(log_ratio ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade) + year, speeches)
mod4 <- lm(bias ~ as.factor(party)*polbar_fr_vote_pct, speeches)
mod5 <- lm(bias ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade), speeches)
mod6 <- lm(bias ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade) + year, speeches)
stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: logged ratio",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "", "", "checkmark", "")))


###
### Extract coefficients and SEs for 
### Figure 3.7: Effect of Far-Right Polling on Speeches, Manifestos, and Inquiries
### (see separate script)
###
mod1 <- lm(log_ratio ~ as.factor(party)*polbar_fr_vote_pct, speeches)
mod3 <- lm(log_ratio ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade) + year, speeches)
mod4 <- lm(bias ~ as.factor(party)*polbar_fr_vote_pct, speeches)
mod6 <- lm(bias ~ as.factor(party)*polbar_fr_vote_pct + as.factor(decade) + year, speeches)
ests_poll =  rbind.data.frame(avg_slopes(mod3, variables="polbar_fr_vote_pct", by="party")[,3:5],
                              avg_slopes(mod6, variables="polbar_fr_vote_pct", by="party")[,3:5])
ests_poll = as.data.frame(ests_poll)
names(ests_poll) = c("party", "coef", "se")


###
### Table B.9: Regression of Keywords in Speeches on Parties – Restricted Set of Keywords
###
mod1 <- lm(log_ratio_restricted ~ as.factor(party), speeches)
mod2 <- lm(log_ratio_restricted ~ as.factor(party) + as.factor(decade), speeches)
mod3 <- lm(log_ratio_restricted ~ as.factor(party) + as.factor(year), speeches)
mod4 <- lm(bias_restricted ~ as.factor(party), speeches)
mod5 <- lm(bias_restricted ~ as.factor(party) + as.factor(decade), speeches)
mod6 <- lm(bias_restricted ~ as.factor(party) + as.factor(year), speeches)
stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: logged ratio",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "", "", "checkmark", "")))


###
### Table B.11: Regression of Keywords in Speeches on Parties – Violent Crimes Only
###
mod1 <- lm(bias_violent ~ as.factor(party), speeches)
mod2 <- lm(bias_violent ~ as.factor(party) + as.factor(decade), speeches)
mod3 <- lm(bias_violent ~ as.factor(party) + as.factor(year), speeches)
stargazer(mod1, mod2, mod3,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: bias",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "")))


###
### Table B.13: Regression of Keywords in Speeches on Parties – Violent Crimes 2000-2020 Only
###
since2000 <- subset(speeches, year>1999)
mod1 <- lm(bias_violent ~ as.factor(party), since2000)
mod2 <- lm(bias_violent ~ as.factor(party) + as.factor(decade), since2000)
mod3 <- lm(bias_violent ~ as.factor(party) + as.factor(year), since2000)
stargazer(mod1, mod2, mod3,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: bias",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "")))


###
### Table B.15: Regression of Keywords in Speeches on Parties – Election Year Interactions
###
bund_elections <- c(1949, 1953, 1957, 1961, 1965, 1969, 1972, 1976, 1980, 1983, 1987, 
                    1990, 1994, 1998, 2002, 2005, 2009, 2013, 2017, 2021)
speeches$electyear <- ifelse(speeches$year %in% bund_elections, 1, 0)
mod1 <- lm(log_ratio ~ as.factor(party)*electyear, speeches)
mod2 <- lm(log_ratio ~ as.factor(party)*electyear + as.factor(decade), speeches)
mod3 <- lm(log_ratio ~ as.factor(party)*electyear + as.factor(year), speeches)
mod4 <- lm(bias ~ as.factor(party)*electyear, speeches)
mod5 <- lm(bias ~ as.factor(party)*electyear + as.factor(decade), speeches)
mod6 <- lm(bias ~ as.factor(party)*electyear + as.factor(year), speeches)
stargazer(mod1, mod2, mod3, mod4, mod5, mod6,
          digits=2, star.cutoffs = c(0.05, 0.01, NA), dep.var.labels.include = FALSE, no.space=T,
          dep.var.caption="Outcome variable: logged ratio",
          omit.stat = c("f", "rsq", "ser"),
          add.lines = list(c("Year FEs", "", "", "checkmark", "", "", "checkmark"),
                           c("Decade FEs", "", "checkmark", "", "", "checkmark", "")))

